from tqdm import tqdm

# 去掉用户交涉大于900的用户
neg_user = [13883189723,18883337780,13883189723,13658334749,13436136688,17384761663,13594065448,15508578190,15086808393,13896198816,15683021916,13996335565,
            13691497239,
            15321372291,
            15723348380,
            15887099676,
            13570954085,
            13637984165,
            18008376096,
            18611454315,
            15320502343,
            13622969011,
            13696478237,
            13186052835,
            15995781087,
            18988903698,
            13452404846,
            15111894675,
            13883491591,
            18680758987,
            15811588471,
            18223319545,
            18623649598,
            13883061675,
            19922912967,
            18996327596,
            18623130627,
            15179889337,
            13594261788,
            15736074916,
            18223358309,
            18717039259,
            13075417325,
            18202822361,
            17623057592,
            18112991396,
            18008613765,
            18551871739,
            13627688086,
            18696594037,
            ]
path1 = 'F:\\LCW\\PycharmProject\\pytorch-learn\\RecSys\\dataset\\negSample\\patent-240k.txt'
path2 = 'F:\\LCW\\PycharmProject\\pytorch-learn\\RecSys\\dataset\\patent-100k\\patent_120k_patent.txt'


# 创建物品数大于20的物品set
with open(path2,'r') as g:
    data_g = g.readlines()
    item_list_g = set()
    for line in tqdm(data_g):
        item_list_g.add(line.split('\n')[0])




# 初始的patent-240k文件
with open(path1,'r') as f:
    data = f.readlines()
    user_item = dict()
    item_list = set()
    for line in tqdm(data):
        user = line.split('\t')[0]
        item = line.split('\t')[1]
        rating = line.split('\t')[2]
        if item in item_list_g:
            if int(user) not in neg_user:
                if user not in user_item:
                    user_item[user] = [item]
                else:
                    if item not in user_item[user]:
                        user_item[user].append(item)

with open('write.txt', 'a') as w:
    for user in tqdm(user_item):
        print(user)
        if int(user) not in neg_user:
            for pos in user_item[user]:
                w.write(user + '\t' + str(pos) + '\t' + '1' + '\n')